1 Introduction

Update of explore-JULES-ES-1p0.Rmd that uses the new packages and functions

We build and test gaussian process emulators, perform a sensitivity analyis, and constrain the input space of Jules.

Andy thinks there might be mileage in analysing the atmospheric growth, which is here. /home/h01/hadaw/Research/ES_PPE/Oct20

At the moment, this vignette is hampered by the fact that emulators are failing on a few of the outputs which represent change over the historical period. The emulator is fine for predicting absolute values in the modern period.

Andy would like to see timeseries of: cVeg, cSoil and nbp, npp in GtC and GtC/yr.

1.1 Preliminaries

Load libraries, functions and data.

# Load helper functions

knitr::opts_chunk$set(fig.path = "figs/", echo = TRUE, message = FALSE, warnings = FALSE)
# load some helper functions
source('~/brazilCSSP/code/brazil_cssp/per_pft.R') # eventually, move the relevant functions
source('explore-JULES-ES-1p0_PPE_functions.R')
# Load packages

library(RColorBrewer)
library(fields)
library(MASS)
library(DiceKriging)
library(DiceEval)
library(ncdf4)
library(ncdf4.helpers)

library(foreach)

library(emtools)
library(imptools)
library(viztools)
library(julesR)
# Some pallete options
yg = brewer.pal(9, "YlGn")
ryb = brewer.pal(11, "RdYlBu")
byr = rev(ryb)
rb = brewer.pal(11, "RdBu")
br = rev(rb)
blues = brewer.pal(9, 'Blues')
cbPal <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")

ysec = 60*60*24*365
years <- 1850:2013

1.1.1 Jules output data location

ensloc <- '/project/carbon_ppe/JULES-ES-1p0_PPE/'
ensmember <- 'P0000/'
subdir <- 'stats/'

floc <- paste0(ensloc,ensmember,subdir)
fn <- 'JULES-ES-1p0_P0000_Annual_global.nc'

# test file
nc <- nc_open(paste0(floc,fn))

# What variables are in the file? 
varlist <- nc.get.variable.list(nc)


for(i in 1:length(varlist)){
  
  var <- varlist[i]
  print(var)
  print(ncatt_get(nc,var,"long_name")[[2]])
}
## [1] "nbp_lnd_sum"
## [1] "Carbon Mass Flux out of Atmosphere Due to Net Biospheric Production on Land"
## [1] "year"
## [1] "year"
## [1] "nbp_lnd_mean"
## [1] "Carbon Mass Flux out of Atmosphere Due to Net Biospheric Production on Land"
## [1] "fLuc_lnd_sum"
## [1] "Net Carbon Mass Flux into Atmosphere Due to Land-Use Change"
## [1] "fLuc_lnd_mean"
## [1] "Net Carbon Mass Flux into Atmosphere Due to Land-Use Change"
## [1] "npp_nlim_lnd_sum"
## [1] "Net Primary Production on Land as Carbon Mass Flux"
## [1] "npp_nlim_lnd_mean"
## [1] "Net Primary Production on Land as Carbon Mass Flux"
## [1] "cSoil_lnd_sum"
## [1] "carbon mass in model soil pool"
## [1] "cSoil_lnd_mean"
## [1] "carbon mass in model soil pool"
## [1] "cVeg_lnd_sum"
## [1] "carbon mass in vegetation"
## [1] "cVeg_lnd_mean"
## [1] "carbon mass in vegetation"
## [1] "landCoverFrac_lnd_sum"
## [1] "Plant Functional Type Grid Fraction"
## [1] "landCoverFrac_lnd_mean"
## [1] "Plant Functional Type Grid Fraction"
## [1] "fHarvest_lnd_sum"
## [1] "Carbon Mass Flux into Atmosphere Due to Crop Harvesting"
## [1] "fHarvest_lnd_mean"
## [1] "Carbon Mass Flux into Atmosphere Due to Crop Harvesting"
## [1] "lai_lnd_sum"
## [1] "leaf area index"
## [1] "lai_lnd_mean"
## [1] "leaf area index"
## [1] "rh_lnd_sum"
## [1] "Total Heterotrophic Respiration on Land as Carbon Mass Flux"
## [1] "rh_lnd_mean"
## [1] "Total Heterotrophic Respiration on Land as Carbon Mass Flux"
## [1] "treeFrac_lnd_sum"
## [1] "Fractional cover of each surface type"
## [1] "treeFrac_lnd_mean"
## [1] "Fractional cover of each surface type"
## [1] "c3PftFrac_lnd_sum"
## [1] "Percentage Cover by C3 Plant Functional Type"
## [1] "c3PftFrac_lnd_mean"
## [1] "Percentage Cover by C3 Plant Functional Type"
## [1] "c4PftFrac_lnd_sum"
## [1] "Percentage Cover by C3 Plant Functional Type"
## [1] "c4PftFrac_lnd_mean"
## [1] "Percentage Cover by C3 Plant Functional Type"
## [1] "shrubFrac_lnd_sum"
## [1] "Percentage Cover by Shrub"
## [1] "shrubFrac_lnd_mean"
## [1] "Percentage Cover by Shrub"
## [1] "baresoilFrac_lnd_sum"
## [1] "Bare Soil Percentage Area Coverage"
## [1] "baresoilFrac_lnd_mean"
## [1] "Bare Soil Percentage Area Coverage"
## [1] "residualFrac_lnd_sum"
## [1] "Percentage of Grid Cell That Is Land but neither Vegetation Covered nor Bare Soil"
## [1] "residualFrac_lnd_mean"
## [1] "Percentage of Grid Cell That Is Land but neither Vegetation Covered nor Bare Soil"

1.2 Extract a time series of an annual, globally aggregated variable

extractTimeseries <- function(nc, variable){
    dat <- ncvar_get(nc, variable)
    out <- dat
    out
}

makeTimeseriesEnsemble <- function(variable, nens = 499, nts = 164, cn = 1850:2013){
  
  # nens is number of ensemble members
  # nts length of timeseries
  # cn is colnames()
  datmat <- matrix(NA, nrow = nens, ncol = nts)
  colnames(datmat) <- cn
  
  enslist <- paste("P", formatC(0:(nens-1), width=4, flag="0"), sep="")
  floc <- paste0(ensloc,ensmember,subdir)
  
  for(i in 1:nens){
    
    vec <- rep(NA,nts)
    
    ensmember <- enslist[i] 
    
    fn <- paste0(ensloc,ensmember,'/stats/','JULES-ES-1p0_',ensmember,'_Annual_global.nc')
 
    
    try(nc <- nc_open(paste0(fn)))
    try(dat <- extractTimeseries(nc, variable))
    
    datmat[i, ] <- dat
    nc_close(nc)
  }
  datmat
}

1.3 Plot timeseries of aggregated global timeseries

par(mfrow= c(1,4), las = 1)

plotcol <- c(makeTransparent('black', 70))

ylim = c(-10, 10)
matplot(years, t(nbp_ens), type = 'l', lty = 'solid',ylim = ylim, col = plotcol, 
        ylab = 'GtC/year', main = 'NBP',
        bty = 'n')

ylim = range(c(npp_ens[,1], npp_ens[,164]))
matplot(years, t(npp_ens), type = 'l', lty = 'solid',ylim = ylim, col = plotcol,
        ylab = 'GtC/year', main = 'NPP',
        bty = 'n')

ylim = range(c(cSoil_ens[,1], cSoil_ens[,164]))
matplot(years, t(cSoil_ens), type = 'l', lty = 'solid',ylim = ylim, col = plotcol,
        ylab = 'GtC', main = 'Soil Carbon',
        bty = 'n')

ylim = range(c(cVeg_ens[,1], cVeg_ens[,164]))
matplot(years, t(cVeg_ens), type = 'l', lty = 'solid',ylim = ylim, col = plotcol,
        ylab = 'GtC', main = 'Vegetation Carbon',
        bty = 'n')

par(mfrow= c(1,4), las = 1)

ylim = range(lai_lnd_mean_ens)
matplot(years, t(lai_lnd_mean_ens), type = 'l', lty = 'solid',ylim = ylim, col = plotcol,
        ylab = 'LAI', main = 'Leaf area index',
        bty = 'n')

ylim = range(treeFrac_lnd_mean_ens)
matplot(years, t(treeFrac_lnd_mean_ens), type = 'l', lty = 'solid',ylim = ylim, col = plotcol,
        ylab = 'fraction', main = 'Tree fraction',
        bty = 'n')

ylim = range(shrubFrac_lnd_mean_ens)
matplot(years, t(shrubFrac_lnd_mean_ens ), type = 'l', lty = 'solid',ylim = ylim, col = plotcol,
        ylab = 'fraction', main = 'Shrub fraction',
        bty = 'n')

ylim = range(baresoilFrac_lnd_mean_ens)
matplot(years, t(baresoilFrac_lnd_mean_ens ), type = 'l', lty = 'solid',ylim = ylim, col = plotcol,
        ylab = 'fraction', main = 'bare soil fraction',
        bty = 'n')

par(mfrow= c(1,3), las = 1)
  
ylim = range(rh_lnd_sum_ens)
matplot(years, t(rh_lnd_sum_ens), type = 'l', lty = 'solid',ylim = ylim, col = plotcol,
        ylab = 'fraction', main = 'Heterotrophic Respiration',
        bty = 'n')

ylim = range(fLuc_lnd_sum_ens)
matplot(years, t(fLuc_lnd_sum_ens), type = 'l', lty = 'solid',ylim = ylim, col = plotcol,
        ylab = 'GtC/year', main = 'Land use change emissions',
        bty = 'n')

ylim = range(fHarvest_lnd_sum_ens)
matplot(years, t(fHarvest_lnd_sum_ens), type = 'l', lty = 'solid',ylim = ylim, col = plotcol,
        ylab = 'GtC/year', main = 'Harvest C flux to atmosphere',
        bty = 'n')

npp_ens_anom <- anomalizeTSmatrix(npp_ens, 1:20)
nbp_ens_anom <- anomalizeTSmatrix(nbp_ens, 1:20)
cSoil_ens_anom <- anomalizeTSmatrix(cSoil_ens, 1:20)
cVeg_ens_anom <- anomalizeTSmatrix(cVeg_ens, 1:20)
par(mfrow= c(1,4), las = 1)

ylim = range(-10, 10)

matplot(years, t(nbp_ens_anom), type = 'l', lty = 'solid',ylim = ylim, col = plotcol,
        ylab = 'NBP sum Anomaly', main = 'NBP Anomaly',
        bty = 'n')

ylim = range(c(npp_ens_anom[,1], npp_ens_anom[,164]))
matplot(years, t(npp_ens_anom), type = 'l', lty = 'solid',ylim = ylim, col = plotcol,
        ylab = 'NPP sum Anomaly', main = 'NPP Anomaly',
        bty = 'n')

ylim = range(c(cSoil_ens_anom[,1], cSoil_ens_anom[,164]))
matplot(years, t(cSoil_ens_anom), type = 'l', lty = 'solid',ylim = ylim, col = plotcol,
        ylab = 'Soil carbon sum anomaly', main = 'Soil Carbon Anomaly',
        bty = 'n')

ylim = range(c(cVeg_ens_anom[,1], cVeg_ens_anom[,164]))
matplot(years, t(cVeg_ens_anom), type = 'l', lty = 'solid',ylim = ylim, col = plotcol,
        ylab = 'Veg carbon sum anomaly', main = 'Vegetation Carbon Anomaly',
        bty = 'n')

1.4 Function to extract the “modern value” direct from the file (last 20 years of the timeseries)

# for each of the variables in the file, average the last 20 years as the "modern" value,
# and then place in a matrix

modernValue <- function(nc, variable, ix){
  # A basic function to read a variable and 
  # take the mean of the timeseries at locations ix
  dat <- ncvar_get(nc, variable)
  out <- mean(dat[ix])
  out
}
#144:164 is the 1993:2013
modernValue(nc = nc, variable = "npp_nlim_lnd_mean", ix = 144:164)

# apply to the test file to check it works
vec <- sapply(varlist, FUN = modernValue, nc = nc, ix = 144:164)

1.5 Loop to extract the “modern value” of a number of model outputs

# Generate ensemble numbers, mean of the last 20 years of the timeseries (1994-2013)

if (file.exists("ensemble.rdata")) {
  load("ensemble.rdata")
} else {
  
  nens = 499
  datmat <- matrix(nrow = nens, ncol = length(varlist))
  colnames(datmat) <- varlist
  
  enslist <- paste("P", formatC(0:(nens-1), width=4, flag="0"), sep="")
  floc <- paste0(ensloc,ensmember,subdir)
  
  for(i in 1:nens){
    
    vec <- rep(NA, length(varlist))
    
    ensmember <- enslist[i] 
    
    fn <- paste0(ensloc,ensmember,'/stats/','JULES-ES-1p0_',ensmember,'_Annual_global.nc')
    #print(fn)
    
    try(nc <- nc_open(paste0(fn)))
    try(vec <- sapply(varlist, FUN = modernValue, nc = nc, ix = 144:164))
    datmat[i, ] <- vec
    nc_close(nc)
  }
  
  save(nens, datmat,enslist,floc, file ="ensemble.rdata")
}

1.6 Calculate an ensemble of anomalies for all variables

For each ensemble member and each variable, calculate the change from the 20 years at the start of the run, to the twenty years at the end of the run.

tsAnomaly <- function(nc, variable, startix = 1:20, endix = 144:164){

  # A basic function to read a variable and calculate the anomaly at the end of the run
  dat <- ncvar_get(nc, variable)
  endMean <- mean(dat[endix])
  startMean <- mean(dat[startix])
  out <- endMean - startMean
  out
}

tsAnomaly(nc = nc, variable = "npp_nlim_lnd_mean")
## [1] 2.410932e-09
# Generate ensemble  mean of the last 20 years of the timeseries (1994-2013)

if (file.exists("anomaly_ensemble.rdata")) {
  load("anomaly_ensemble.rdata")
} else {
  
  nens = 499
  datmatAnom <- matrix(nrow = nens, ncol = length(varlist))
  colnames(datmatAnom) <- varlist
  
  enslist <- paste("P", formatC(0:(nens-1), width=4, flag="0"), sep="")
  floc <- paste0(ensloc,ensmember,subdir)
  
  for(i in 1:nens){
    
    vec <- rep(NA, length(varlist))
    
    ensmember <- enslist[i] 
    
    fn <- paste0(ensloc,ensmember,'/stats/','JULES-ES-1p0_',ensmember,'_Annual_global.nc')
    #print(fn)
    
    try(nc <- nc_open(paste0(fn)))
    try(vec <- sapply(varlist, FUN = tsAnomaly, nc = nc))
    datmatAnom[i, ] <- vec
    nc_close(nc)
  }
  
  save(nens, datmatAnom, enslist,floc, file ="anomaly_ensemble.rdata")
}

1.7 Clean data sets to “level 0”

Initial clean of data set, removing variables that don’t change, and removing NAs (models that didn’t run).

Y_nlevel0_ix <- which(is.na(datmat[,'year']))

YAnom_nlevel0_ix <- which(is.na(datmatAnom[,'year']))

# This should be true to proceed, or we'll have to start excluding the combined set.
identical(Y_nlevel0_ix, YAnom_nlevel0_ix)
## [1] TRUE
# Y is the whole data set
Y <- datmat
# Y_level0 is the 'cleaned' data set, truncated to variables that change, and removing NAs
Y_level0 <- datmat[-Y_nlevel0_ix, -c(2,30,31)]
Y_nlevel0 <- datmat[Y_nlevel0_ix, -c(2, 30, 31)]


# Y is the whole data set
YAnom <- datmatAnom
# Y.level0 is the 'cleaned' data set, truncated to variables that change, and removing NAs
YAnom_level0 <- datmatAnom[-YAnom_nlevel0_ix, -c(2,30,31)]
YAnom_nlevel0 <- datmatAnom[YAnom_nlevel0_ix, -c(2,30,31)]
# load the original design and input space, normalize to [0-1]

# Load up the data
lhs_i = read.table('~/brazilCSSP/code/brazil_cssp/analyze_u-ao732/data/ES_PPE_i/lhs_u-ao732.txt', header = TRUE)
lhs_ii = read.table('~/brazilCSSP/code/brazil_cssp/analyze_u-ao732/data/ES_PPE_ii/lhs_u-ao732a.txt', header = TRUE)

toplevel_ix = 1:499

# The raw input data is a latin hypercube
lhs = rbind(lhs_i, lhs_ii)[toplevel_ix, ]
lhs_level0 <- lhs[-Y_nlevel0_ix,]

X = normalize(lhs)
colnames(X) = colnames(lhs)

X_level0 <- X[-Y_nlevel0_ix,]
X_nlevel0 <- X[Y_nlevel0_ix,]

d = ncol(X)
# lower and higher bound on the normalised matrix for visualisation
rx = rbind(rep(0,32), rep(1,32))

1.8 Where did the model fail to run?

(“probability of run failure” Could be an interesting project - logit transformation for probability of run failure? Some other ML like SVM?

There are clear run failure thresholds in the parameters rootd_ft_io and lai_max, and quite strong visual indications that a_wl_io and bio_hum_cn matter.

#simple way
par(oma = c(10,10,0,0))
#par(xpd = TRUE)
pairs(X_nlevel0, 
      xlim = c(0,1), ylim = c(0,1),
      labels = 1:d,
      col = makeTransparent('red', 150),
      gap = 0,
      pch = 20,
      xaxt ='n', yaxt = 'n',
      lower.panel = NULL)

reset()

legend('left', legend = paste(1:d, colnames(lhs)), cex = 1.2, bty = 'n')

1.9 exit knitr

knitr::knit_exit()